* Take raw patient data for AMI patients and add required controls for main FE analysis

clear all
set more off 
pause on 

use "$savedata/ami_readmit_death_evercardio_0517", clear

egen min_finyear = min(finyear), by(extract ami_date)
keep if finyear==min_finyear
drop min_finyear


* Merge in area level controls
*******************************
rename soam msoa
merge m:1 msoa using "$inputs/msoa_vars.dta", keepusing(privnjr_absdist2010 msoa_imdscore_all pop_density med_wait ave_wait early_ft teaching hp_i sales_i stand_ami)


keep if _merge==3
drop _merge

rename soal lsoa
merge m:1 lsoa using "$inputs/lsoa_vars.dta", keepusing(lsoa_imdscore10 lsoa_imdrank10 lsoa_density01 lsoa_wa_benefits)
keep if _merge==3


* Health related controls
***************************

** Was the heart attack a stemi?
gen stemi = 0
replace stemi = 1 if fulldiag3_1=="I229" | fulldiag3_1=="I211" | fulldiag3_1=="I212" | fulldiag3_1=="I213" | fulldiag3_1=="I220" | fulldiag3_1=="I221" | fulldiag3_1=="I222"
forval x=2(1)20{
replace stemi = 1 if fulldiag3_`x'=="I229" | fulldiag3_`x'=="I211" | fulldiag3_`x'=="I212" | fulldiag3_`x'=="I213" | fulldiag3_`x'=="I220" | fulldiag3_`x'=="I221" | fulldiag3_`x'=="I222"
}


* Make Charlson here
foreach year in 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 {
preserve
keep if finyear==`year'
* Create charlson index (some measure of comorbidity)
charlson, index(10) idvar(extract) diagprfx(fulldiag3_) wtchrl cmorb
gen finyear=`year'
save "$savedata/charlson_`year'", replace
restore
}


* Merge Charlson
set more off
drop _merge
foreach year in 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 {
merge m:1 extract finyear  using "$savedata/charlson_`year'", update
drop if _merge==2
drop _merge
}


* Ethnicity
gen white = 0
replace white = 1 if ethnos=="A"
replace white = 1 if ethnos=="B"
replace white = 1 if ethnos=="C"

gen mixed = 0
replace mixed = 1 if ethnos=="D" | ethnos=="E" | ethnos=="F" | ethnos=="G" | ethnos=="S"

gen black = 0
replace black = 1 if ethnos=="N" | ethnos=="M" | ethnos=="P"

gen asian = 0
replace asian = 1 if ethnos=="H" | ethnos=="J" | ethnos=="L" | ethnos=="K"

gen chinese = 0
replace chinese = 1 if ethnos=="R"

gen race_miss = 0
replace race_miss = 1 if ethnos=="99" | ethnos=="X" | ethnos=="Z"


*  In a robustness check I use ED data (only available from 2007, and reliably from 2009) - merge this in here
*  Note: Even for full coverage years, the merge is only around 70% - this is related to organisation of cardiac care at different hospitals (some patients taken directly to cardiology department)

gen anymerge=0

foreach year in 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 {
gen exitdate=ami_date


merge 1:m extract exitdate using "$eddata/data_ae`year'", update force keepusing(aearrivalmode aeattenddisp aedepttype initdur tretdur concldur depdur arrivaltime inittime trettime concltime exittime sushrg admit discharge arrivaldate arrivalhour exithour d_male d_age d_ambulance d1 dcount t1 tcount i1 icount) /* Note, force only needed for one str/byte variable (which is empty in the using data) just for 2007 only */
drop if _merge==2

rename _merge _merge`year'
replace anymerge=1 if _merge`year'==3 | _merge`year'==4 | _merge`year'==5
sort extract exitdate exithour
bys extract exitdate: gen order=_n
egen maxorder = max(order), by(extract exitdate)
keep if order==maxorder
drop order maxorder

replace exitdate = ami_date-1

merge 1:m extract exitdate using "$eddata/data_ae`year'", update force keepusing(aearrivalmode aeattenddisp aedepttype initdur tretdur concldur depdur arrivaltime inittime trettime concltime exittime sushrg admit discharge arrivaldate arrivalhour exithour d_male d_age d_ambulance d1 dcount t1 tcount i1 icount) /* Note, force only needed for one str/byte variable (which is empty in the using data) just for 2007 only */
drop if _merge==2

rename _merge _merge`year'_2
replace anymerge=1 if _merge`year'==3 | _merge`year'==4 | _merge`year'==5
sort extract exitdate exithour
bys extract exitdate: gen order=_n
egen maxorder = max(order), by(extract exitdate)
keep if order==maxorder
drop order maxorder

drop exitdate
}

compress


* Merge in out-of-hospital mortality data for AMI patients

drop encrypted_hesid
gen encrypted_hesid = extract_hesid
cap drop _merge

merge m:1 encrypted_hesid using "$mortalitydata/hes_mortality_with_hesid_resupply.dta"
drop if _merge==2

gen death_year = substr(dod,1,4)
gen death_month = substr(dod,6,2)
gen death_day = substr(dod,9,2)

destring death_year, replace
destring death_month, replace
destring death_day, replace

gen death_date = mdy(death_month, death_day, death_year)
gen time_elapsed = death_date - ami_date
replace time_elapsed = . if time_elapsed<0

foreach x in 7 30 90 180 365 730{
gen all_death`x' = 0
replace all_death`x' = 1 if time_elapsed<=`x'
}

encode pconsult, gen(doctor_id)
encode trust_code, gen(trust_num)

* Cleaning
* Drop those with a missing consultant code
drop if pconsult=="99" | pconsult=="&"
* Drop non-feasible and missing age/sex
drop if derv_age<0 | derv_age>100
drop if derv_age==.
drop if sex==.

** Generate IDs of the people who I want to extract previous treatment for - use this in file X to create past IP costs
preserve
bys extract: gen xx=_n
keep if xx==1
save "$savedata/amipats_ids0517.dta", replace
count
restore

tempfile temp
save `temp'

* Run do-file here to create past hospital costs
preserve

**NOW RUNNING DO FILE THAT CREATES PAST IP TREATMENT COST
do "$dofile/4a. create_past_treatment_costs.do"

use `temp', clear

** Merge in past hospital costs 
cap drop _merge
merge m:1 extract_hesid using "/$savedata/pastip_0417.dta"
drop if _merge==2

* Assign zero costs to missing values - the missings are all patients who didn't have any hospital admissions in that year (and therefore no hospital costs)
foreach year in 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017{
replace fce_cost`year'=0 if fce_cost`year'==.
replace fce_cost_el`year'=0 if fce_cost_el`year'==.
replace fce_cost_em`year'=0 if fce_cost_em`year'==.
replace los`year'=0 if los`year'==.
replace ami`year' = 0 if ami`year'==. 
replace stroke`year' = 0 if stroke`year'==. 
replace n_admit`year' = 0 if n_admit`year'==. 
replace n_em_admit`year' = 0 if n_em_admit`year'==. 
replace n_el_admit`year' = 0 if n_el_admit`year'==. 
replace n_all_emdiag`year' = 0 if n_all_emdiag`year'==.
}

* Generate the cost in the previous financial year

gen prevyear_cost = fce_cost2004 if finyear==2005
gen prevyear_cost_el = fce_cost_el2004 if finyear==2005
gen prevyear_cost_em = fce_cost_em2004 if finyear==2005
gen prevyear_stroke = stroke2004 if finyear==2005
gen prevyear_ami = ami2004 if finyear==2005
gen prevyear_los = los2004 if finyear==2005
gen prevyear_admit = n_admit2004 if finyear==2005
gen prevyear_admit_em = n_em_admit2004 if finyear==2005
gen prevyear_admit_el = n_el_admit2004 if finyear==2005
gen prevyear_emdiag = n_all_emdiag2004 if finyear==2005

forval i=2006(1)2017{
	local j=`i'-1
	replace prevyear_cost = fce_cost`j' if finyear==`i'
	replace prevyear_cost_el = fce_cost_el`j' if finyear==`i'
	replace prevyear_cost_em = fce_cost_em`j' if finyear==`i'
	replace prevyear_stroke = stroke`j' if finyear==`i'
	replace prevyear_ami = ami`j' if finyear==`i'
	replace prevyear_los = los`j' if finyear==`i'
	replace prevyear_admit = n_admit`j' if finyear==`i'
	replace prevyear_admit_em = n_em_admit`j' if finyear==`i'
	replace prevyear_admit_el = n_el_admit`j' if finyear==`i'
	replace prevyear_emdiag = n_all_emdiag`j' if finyear==`i'
	}

drop _merge

*****************
* Add in costs based on 2014/15 NHS grouper, and collapsed by procedure or (if missing) diagnosis [CAN I PROVIDE THESE COSTS? SHOULD DO, AND ADD IN README WHERE IT'S FROM']

gen elective = 0
gen emergency = 1

gen diag_01 = substr(fulldiag3_1,1,4)

*Merge with imputed cost based on primary operation
merge m:1 opertn_01 elective using "$inputs/oper_imp_cost.dta"

gen imputed_cost_op = fce_cost if _merge==3
drop if _merge==2
drop _merge fce_cost

*Merge with imputed cost based on primary diagnosis (for those with no operation)
merge m:1 diag_01 elective using "$inputs/diag_no_oper_imp_cost.dta"

* use imputed operation cost if available
replace fce_cost = imputed_cost_op if !missing(imputed_cost_op)
drop if _merge==2
drop _merge

* drop the (very few) observations without an imputed cost
keep if fce_cost !=.

*/

* Patient Descriptives
gen male = sex-1
replace male=1-male
keep if male==0 | male==1

save "$savedata/ami_analysis_0517.dta", replace

restore, not

* Keep those treated by doctors whose main registered specialty is as a cardiologist
keep if mainspef=="320"
* Keep only those without heart attacks in previous year (worried about matching between docs and patients here)
keep if prevyear_ami==0
save "$savedata/ami_analysis_mainspef.dta", replace

